FALSE Registered S3 methods overwritten by 'dbplyr':
FALSE method from
FALSE print.tbl_lazy
FALSE print.tbl_sql
FALSE -- Attaching packages -------------------------------------------------------------------------------------------------------- tidyverse 1.3.1 --
FALSE v ggplot2 3.3.5 v purrr 0.3.4
FALSE v tibble 3.1.6 v dplyr 1.0.7
FALSE v tidyr 1.1.4 v stringr 1.4.0
FALSE v readr 2.1.1 v forcats 0.5.1
FALSE -- Conflicts ----------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
FALSE x dplyr::filter() masks stats::filter()
FALSE x dplyr::lag() masks stats::lag()
FALSE
FALSE Attaching package: ‘scales’
FALSE
FALSE The following object is masked from ‘package:purrr’:
FALSE
FALSE discard
FALSE
FALSE The following object is masked from ‘package:readr’:
FALSE
FALSE col_factor
FALSE
FALSE Registered S3 method overwritten by 'data.table':
FALSE method from
FALSE print.data.table
FALSE Registered S3 method overwritten by 'htmlwidgets':
FALSE method from
FALSE print.htmlwidget tools:rstudio
FALSE
FALSE Attaching package: ‘plotly’
FALSE
FALSE The following object is masked from ‘package:ggplot2’:
FALSE
FALSE last_plot
FALSE
FALSE The following object is masked from ‘package:stats’:
FALSE
FALSE filter
FALSE
FALSE The following object is masked from ‘package:graphics’:
FALSE
FALSE layout
FALSE
FALSE data.table 1.14.2 using 4 threads (see ?getDTthreads). Latest news: r-datatable.com
FALSE
FALSE Attaching package: ‘data.table’
FALSE
FALSE The following objects are masked from ‘package:dplyr’:
FALSE
FALSE between, first, last
FALSE
FALSE The following object is masked from ‘package:purrr’:
FALSE
FALSE transpose
FALSE
FALSE
FALSE Attaching package: ‘lubridate’
FALSE
FALSE The following objects are masked from ‘package:data.table’:
FALSE
FALSE hour, isoweek, mday, minute, month, quarter, second, wday, week, yday, year
FALSE
FALSE The following objects are masked from ‘package:base’:
FALSE
FALSE date, intersect, setdiff, union
FALSE
FALSE Loading required package: kableExtra
FALSE
FALSE Attaching package: ‘kableExtra’
FALSE
FALSE The following object is masked from ‘package:dplyr’:
FALSE
FALSE group_rows
FALSE
FALSE
FALSE Attaching package: ‘timetk’
FALSE
FALSE The following object is masked from ‘package:data.table’:
FALSE
FALSE :=
FALSE
FALSE Loading required package: svd
FALSE Loading required package: forecast
FALSE Registered S3 method overwritten by 'quantmod':
FALSE method from
FALSE as.zoo.data.frame zoo
FALSE
FALSE Attaching package: ‘Rssa’
FALSE
FALSE The following object is masked from ‘package:stats’:
FALSE
FALSE decompose
covid_NAs <- covid %>%
group_by(location) %>%
summarise_all(funs(sum(is.na(.)))) %>%
pivot_longer(cols = -location, names_to = "Variable", values_to = "NAs") %>%
mutate(Percent = round(NAs / nrow(covid) * 100 ,2)) %>%
arrange(-NAs)
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
covid_NAs %>%
group_by(location) %>%
summarise(total_pct_na = sum(Percent)) %>%
arrange(total_pct_na) %>%
datatable(filter = 'top')
#Helper function for filtering data
my_data <- function(country_covid_filter, country_gisaid_filter){
data <- covid %>%
filter(location == country_covid_filter)
gisaid_data <- gisaid_variants %>%
filter(Country == country_gisaid_filter)
data <- left_join(data, gisaid_data, by = "date")
data
}
us <- my_data("United States", "USA")
variants_plot <- ggplot(data = us, aes(x = date)) +
geom_area(aes(y = perc_sequences, color = variant, fill = variant), position = "dodge", show.legend = FALSE) +
theme_minimal()
cases_plot <-
ggplot(data = us, aes(x = date)) +
geom_line(aes(y = new_cases_per_million), show.legend = FALSE) +
geom_line(aes(y = new_deaths_per_million)) +
theme_minimal()
deaths_plot <- ggplot(data = us, aes(x = date)) +
geom_line(aes(y = new_deaths_per_million)) +
theme_minimal()
vaccinations_plot <- ggplot(us, aes(x = date)) +
geom_line(aes(y = new_vaccinations_smoothed_per_million)) +
theme_minimal()
variants_cases_plot <- ggplot(data = us, aes(x = date)) +
geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") +
geom_line(aes(y = new_cases_smoothed_per_million / 40)) +
scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~.*40, name = "New Cases Per Million")) +
theme_minimal() +
labs(title = "Proportion of Covid Variants vs New Cases Per Million") +
annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")
variants_deaths_plot <- ggplot(data = us, aes(x = date)) +
geom_area(aes(y = perc_sequences, color = variant, fill = variant),show.legend =FALSE, alpha = 0.5, position = "dodge") +
geom_line(aes(y = new_deaths_smoothed_per_million*5)) +
scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~./5, name = "New Deaths Per Million")) +
theme_minimal() +
labs(title = "Proportion of Covid Variants vs New Deaths Per Million") +
annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")
cases_vaccinations_plot <- ggplot(data = us, aes(x = date)) +
geom_line(aes(y = new_cases_smoothed_per_million), show.legend = FALSE) +
geom_line(aes(y = people_vaccinated_per_hundred*50)) +
scale_y_continuous("New Cases Per Million", sec.axis=sec_axis(~./50, name = "People Vaccinated Per Hundred")) +
theme_minimal() +
labs(title = "New Cases Per Million vs. People Vaccinated Per Hundred")
deaths_vaccinations_plot <- ggplot(us, aes(x = date)) +
geom_line(aes(y = new_deaths_per_million), show.legend = FALSE) +
geom_line(aes(y = people_fully_vaccinated_per_hundred/7)) +
scale_y_continuous("New Deaths Per Million", sec.axis=sec_axis(~.*7, name = "People Vaccinated Per Hundred")) +
theme_minimal() +
labs(title = "New Deaths Per Million vs. People Fully Vaccinated Per Hundred")
variants_hospitalizations_plot <- ggplot(us, aes(x = date)) +
geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") +
geom_line(aes(y = weekly_hosp_admissions_per_million / 5)) +
scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~.*5, name = "Hospitalizations Per Million")) +
theme_minimal() +
labs(title = "Proportion of Covid Variants vs Hospitalizations Per Million") +
annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")
variants_vaccinations_plot <- ggplot(us, aes(x = date)) +
geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") +
geom_line(aes(y = people_fully_vaccinated_per_hundred)) +
scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~., name = "People Vaccinated Per Hundred")) +
theme_minimal() +
labs(title = "Proportion of Covid Variants vs People Fully Vaccinated") +
annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")
variants_vaccinations_plot
vaccinations_plot
Warning: Removed 404 row(s) containing missing values (geom_path).

variants_cases_plot <- ggplot(data = us, aes(x = date)) +
geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") +
geom_line(aes(y = new_cases_smoothed_per_million / 40)) +
scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~.*40, name = "New Cases Per Million")) +
theme_minimal() +
labs(title = "Proportion of Covid Variants vs New Cases Per Million") +
annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")
variants_cases_plot
Warning: Width not defined. Set with `position_dodge(width = ?)`
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning: Removed 7 row(s) containing missing values (geom_path).

variants_hospitalizations_plot
Warning: Width not defined. Set with `position_dodge(width = ?)`
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning: Removed 206 row(s) containing missing values (geom_path).

variants_deaths_plot
Warning: Width not defined. Set with `position_dodge(width = ?)`
Warning in max(ids, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning: Removed 45 row(s) containing missing values (geom_path).

Time Series Analysis
us %>%
plot_time_series(date, new_cases_smoothed_per_million)
us %>%
plot_acf_diagnostics(date, new_cases_smoothed_per_million, .show_white_noise_bars = T)
gisaid_variants %>%
filter(Country == "USA", variant %in% c("VOC Omicron GRA (B.1.1.529+BA.*) ", "VOC Delta GK (B.1.617.2+AY.*) ", "VOC Alpha GRY (B.1.1.7+Q.*) ")) %>%
plot_time_series(date, perc_sequences, .facet_vars=variant, .legend_show = FALSE)
gisaid_variants %>%
filter(Country == "USA", variant %in% c("VOC Omicron GRA (B.1.1.529+BA.*) ", "VOC Delta GK (B.1.617.2+AY.*) ", "VOC Alpha GRY (B.1.1.7+Q.*) ")) %>%
group_by(variant) %>%
plot_acf_diagnostics(date, perc_sequences, .show_white_noise_bars = T)
Max lag exceeds data available. Using max lag: 73
Max lag exceeds data available. Using max lag: 80
Max lag exceeds data available. Using max lag: 22
library("sf")
Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library("rnaturalearth")
library("rnaturalearthdata")
world <- ne_countries(scale = "medium", returnclass = "sf")
class(world)
[1] "sf" "data.frame"
unique(gisaid$Country)
[1] "Afghanistan" "Albania" "Algeria" "American Samoa"
[5] "Andorra" "Angola" "Anguilla" "Antigua and Barbuda"
[9] "Argentina" "Armenia" "Aruba" "Australia"
[13] "Austria" "Azerbaijan" "Bahrain" "Bangladesh"
[17] "Barbados" "Belarus" "Belgium" "Belize"
[21] "Benin" "Bermuda" "Bolivia" "Bonaire"
[25] "Bosnia and Herzegovina" "Botswana" "Brazil" "British Virgin Islands"
[29] "Brunei" "Bulgaria" "Burkina Faso" "Burundi"
[33] "Cabo Verde" "Cambodia" "Cameroon" "Canada"
[37] "Canary Islands" "Cayman Islands" "Central African Republic" "Chad"
[41] "Chile" "China" "Colombia" "Comoros"
[45] "Costa Rica" "Cote d'Ivoire" "Crimea" "Croatia"
[49] "Cuba" "Curacao" "Cyprus" "Czech Republic"
[53] "Democratic Republic of the Congo" "Denmark" "Djibouti" "Dominica"
[57] "Dominican Republic" "Ecuador" "Egypt" "El Salvador"
[61] "Equatorial Guinea" "Estonia" "Eswatini" "Ethiopia"
[65] "Faroe Islands" "Fiji" "Finland" "France"
[69] "French Guiana" "French Polynesia" "Gabon" "Gambia"
[73] "Georgia" "Germany" "Ghana" "Gibraltar"
[77] "Greece" "Grenada" "Guadeloupe" "Guam"
[81] "Guatemala" "Guinea" "Guinea-Bissau" "Guyana"
[85] "Haiti" "Honduras" "Hong Kong" "Hungary"
[89] "Iceland" "India" "Indonesia" "Iran"
[93] "Iraq" "Ireland" "Israel" "Italy"
[97] "Jamaica" "Japan" "Jordan" "Kazakhstan"
[101] "Kenya" "Kosovo" "Kuwait" "Kyrgyzstan"
[105] "Laos" "Latvia" "Lebanon" "Lesotho"
[109] "Liberia" "Libya" "Liechtenstein" "Lithuania"
[113] "Luxembourg" "Madagascar" "Malawi" "Malaysia"
[117] "Maldives" "Mali" "Malta" "Martinique"
[121] "Mauritania" "Mauritius" "Mayotte" "Mexico"
[125] "Moldova" "Monaco" "Mongolia" "Montenegro"
[129] "Montserrat" "Morocco" "Mozambique" "Myanmar"
[133] "Namibia" "Nepal" "Netherlands" "New Caledonia"
[137] "New Zealand" "Nicaragua" "Niger" "Nigeria"
[141] "North Macedonia" "Northern Mariana Islands" "Norway" "Oman"
[145] "Pakistan" "Palau" "Palestine" "Panama"
[149] "Papua New Guinea" "Paraguay" "Peru" "Philippines"
[153] "Poland" "Portugal" "Puerto Rico" "Qatar"
[157] "Republic of the Congo" "Reunion" "Romania" "Russia"
[161] "Rwanda" "Saint Barthelemy" "Saint Kitts and Nevis" "Saint Lucia"
[165] "Saint Martin" "Saint Vincent and the Grenadines" "Sao Tome and Principe" "Saudi Arabia"
[169] "Senegal" "Serbia" "Seychelles" "Sierra Leone"
[173] "Singapore" "Sint Eustatius" "Sint Maarten" "Slovakia"
[177] "Slovenia" "Solomon Islands" "Somalia" "South Africa"
[181] "South Korea" "South Sudan" "Spain" "Sri Lanka"
[185] "Sudan" "Suriname" "Sweden" "Switzerland"
[189] "Syria" "Taiwan" "Tanzania" "Thailand"
[193] "The Bahamas" "Timor-Leste" "Togo" "Trinidad and Tobago"
[197] "Tunisia" "Turkey" "Turks and Caicos Islands" "U.S. Virgin Islands"
[201] "USA" "Uganda" "Ukraine" "United Arab Emirates"
[205] "United Kingdom" "Uruguay" "Uzbekistan" "Vanuatu"
[209] "Venezuela" "Vietnam" "Wallis and Futuna Islands" "Zambia"
[213] "Zimbabwe"
unique(covid$location)
[1] "Afghanistan" "Africa" "Albania" "Algeria"
[5] "Andorra" "Angola" "Anguilla" "Antigua and Barbuda"
[9] "Argentina" "Armenia" "Aruba" "Asia"
[13] "Australia" "Austria" "Azerbaijan" "Bahamas"
[17] "Bahrain" "Bangladesh" "Barbados" "Belarus"
[21] "Belgium" "Belize" "Benin" "Bermuda"
[25] "Bhutan" "Bolivia" "Bonaire Sint Eustatius and Saba" "Bosnia and Herzegovina"
[29] "Botswana" "Brazil" "British Virgin Islands" "Brunei"
[33] "Bulgaria" "Burkina Faso" "Burundi" "Cambodia"
[37] "Cameroon" "Canada" "Cape Verde" "Cayman Islands"
[41] "Central African Republic" "Chad" "Chile" "China"
[45] "Colombia" "Comoros" "Congo" "Cook Islands"
[49] "Costa Rica" "Cote d'Ivoire" "Croatia" "Cuba"
[53] "Curacao" "Cyprus" "Czechia" "Democratic Republic of Congo"
[57] "Denmark" "Djibouti" "Dominica" "Dominican Republic"
[61] "Ecuador" "Egypt" "El Salvador" "Equatorial Guinea"
[65] "Eritrea" "Estonia" "Eswatini" "Ethiopia"
[69] "Europe" "European Union" "Faeroe Islands" "Falkland Islands"
[73] "Fiji" "Finland" "France" "French Polynesia"
[77] "Gabon" "Gambia" "Georgia" "Germany"
[81] "Ghana" "Gibraltar" "Greece" "Greenland"
[85] "Grenada" "Guam" "Guatemala" "Guernsey"
[89] "Guinea" "Guinea-Bissau" "Guyana" "Haiti"
[93] "High income" "Honduras" "Hong Kong" "Hungary"
[97] "Iceland" "India" "Indonesia" "International"
[101] "Iran" "Iraq" "Ireland" "Isle of Man"
[105] "Israel" "Italy" "Jamaica" "Japan"
[109] "Jersey" "Jordan" "Kazakhstan" "Kenya"
[113] "Kiribati" "Kosovo" "Kuwait" "Kyrgyzstan"
[117] "Laos" "Latvia" "Lebanon" "Lesotho"
[121] "Liberia" "Libya" "Liechtenstein" "Lithuania"
[125] "Low income" "Lower middle income" "Luxembourg" "Macao"
[129] "Madagascar" "Malawi" "Malaysia" "Maldives"
[133] "Mali" "Malta" "Marshall Islands" "Mauritania"
[137] "Mauritius" "Mexico" "Micronesia (country)" "Moldova"
[141] "Monaco" "Mongolia" "Montenegro" "Montserrat"
[145] "Morocco" "Mozambique" "Myanmar" "Namibia"
[149] "Nauru" "Nepal" "Netherlands" "New Caledonia"
[153] "New Zealand" "Nicaragua" "Niger" "Nigeria"
[157] "Niue" "North America" "North Macedonia" "Northern Cyprus"
[161] "Northern Mariana Islands" "Norway" "Oceania" "Oman"
[165] "Pakistan" "Palau" "Palestine" "Panama"
[169] "Papua New Guinea" "Paraguay" "Peru" "Philippines"
[173] "Pitcairn" "Poland" "Portugal" "Puerto Rico"
[177] "Qatar" "Romania" "Russia" "Rwanda"
[181] "Saint Helena" "Saint Kitts and Nevis" "Saint Lucia" "Saint Pierre and Miquelon"
[185] "Saint Vincent and the Grenadines" "Samoa" "San Marino" "Sao Tome and Principe"
[189] "Saudi Arabia" "Senegal" "Serbia" "Seychelles"
[193] "Sierra Leone" "Singapore" "Sint Maarten (Dutch part)" "Slovakia"
[197] "Slovenia" "Solomon Islands" "Somalia" "South Africa"
[201] "South America" "South Korea" "South Sudan" "Spain"
[205] "Sri Lanka" "Sudan" "Suriname" "Sweden"
[209] "Switzerland" "Syria" "Taiwan" "Tajikistan"
[213] "Tanzania" "Thailand" "Timor" "Togo"
[217] "Tokelau" "Tonga" "Trinidad and Tobago" "Tunisia"
[221] "Turkey" "Turkmenistan" "Turks and Caicos Islands" "Tuvalu"
[225] "Uganda" "Ukraine" "United Arab Emirates" "United Kingdom"
[229] "United States" "United States Virgin Islands" "Upper middle income" "Uruguay"
[233] "Uzbekistan" "Vanuatu" "Vatican" "Venezuela"
[237] "Vietnam" "Wallis and Futuna" "World" "Yemen"
[241] "Zambia" "Zimbabwe"
world_variants <- gisaid %>%
group_by(Country) %>%
mutate(Country = case_when(
Country == "USA" ~ "United States",
TRUE ~ as.character(Country)
)) %>%
summarise(most_recent_date = date[n()],
prevalent_variant = variant[date == date[n()] & Type == "Variant" & perc_sequences == max(perc_sequences)]) %>%
#prevalent_lineage = variant[date == date[n()] & Type == "Lineage" & perc_sequences == max(perc_sequences)]) %>%
rename(location = Country)
`summarise()` has grouped output by 'Country'. You can override using the `.groups` argument.
world_variants <- left_join(world_variants, covid[, c("location", "iso_code")], by = "location", all.x = TRUE) %>%
rename(gu_a3 = iso_code)
world_variants_map <- left_join(world, world_variants, by = "gu_a3", all.x = TRUE)
p <- ggplot(data = world_variants_map, aes(fill = prevalent_variant)) +
geom_sf(show.legend = FALSE) +
xlab("Longitude") +
ylab("Latitude") +
theme(panel.grid.major = element_line(color = gray(.5), linetype = "dashed", size = 0.5), panel.background = element_rect(fill = "aliceblue"))
#p
---
title: "COVID-19 Variants Analysis"
output: html_notebook
---

```{r, include = FALSE}
#knitr::opts_chunk$set(echo = FALSE, warning = FALSE)
```

```{r, echo = FALSE, message=FALSE, warning=FALSE, comment=FALSE}
library(tidyverse)
library(scales)
library(plotly)
library(DT)
library(data.table)
library(gtable)
library(plotly)
library(lubridate)
library(vtable)
library(rjson)
library(timetk)
library(Rssa)
```

```{r, echo = FALSE, warning=FALSE}
covid <- read_csv("data/owid-covid-data.csv",show_col_types = FALSE)
#variants <- read_csv("data/covid-variants.csv",show_col_types = FALSE)

gisaid <- as.data.frame(fread("data/gisaid_variants_statistics.tsv")) %>% 
  rename(date = `Week prior to`,
         count = `Submission Count`,
         perc_sequences = `% per Country and Week`,
         total = `Total per Country and Week`,
         variant = Value) %>% 
  mutate(date = ymd(date),
         perc_sequences = round(count / total * 100, 3))# %>% 
#  separate(variant, into = c("variant", "origin"), sep = c("first detected in "))

gisaid_variants <- gisaid %>% 
  filter(Type == "Variant") %>%
  separate(variant, c("variant", "origin"), sep = "first detected in ") %>% 
  select(-Type)
```

```{r}
covid_NAs <- covid %>% 
  group_by(location) %>% 
  summarise_all(funs(sum(is.na(.)))) %>% 
  pivot_longer(cols = -location, names_to = "Variable", values_to = "NAs") %>% 
  mutate(Percent = round(NAs / nrow(covid) * 100 ,2)) %>% 
  arrange(-NAs)
```

```{r}
covid_NAs %>% 
  group_by(location) %>% 
  summarise(total_pct_na = sum(Percent)) %>% 
  arrange(total_pct_na) %>% 
  datatable(filter = 'top')
```

```{r}
#Helper function for filtering data
my_data <- function(country_covid_filter, country_gisaid_filter){
  data <- covid %>% 
    filter(location == country_covid_filter)
  gisaid_data <- gisaid_variants %>% 
    filter(Country == country_gisaid_filter)
  data <- left_join(data, gisaid_data, by = "date")
  data
}

us <- my_data("United States", "USA")
```


```{r "US Plots"}
variants_plot <- ggplot(data = us, aes(x = date)) +
  geom_area(aes(y = perc_sequences, color = variant, fill = variant), position = "dodge", show.legend = FALSE) +
  theme_minimal()


cases_plot <-
  ggplot(data = us, aes(x = date)) +
  geom_line(aes(y = new_cases_per_million), show.legend = FALSE) +
  geom_line(aes(y = new_deaths_per_million)) + 
  theme_minimal()


deaths_plot <- ggplot(data = us, aes(x = date)) +
  geom_line(aes(y = new_deaths_per_million)) + 
  theme_minimal()


vaccinations_plot <- ggplot(us, aes(x = date)) +
  geom_line(aes(y = new_vaccinations_smoothed_per_million)) +
  theme_minimal()


variants_cases_plot <- ggplot(data = us, aes(x = date)) + 
  geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") + 
  geom_line(aes(y = new_cases_smoothed_per_million / 40)) + 
  scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~.*40, name = "New Cases Per Million")) + 
  theme_minimal() + 
  labs(title = "Proportion of Covid Variants vs New Cases Per Million")  +
  annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
  annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
  annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")


variants_deaths_plot <- ggplot(data = us, aes(x = date)) + 
  geom_area(aes(y = perc_sequences, color = variant, fill = variant),show.legend =FALSE, alpha = 0.5, position = "dodge") + 
  geom_line(aes(y = new_deaths_smoothed_per_million*5)) + 
  scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~./5, name = "New Deaths Per Million")) + 
  theme_minimal() + 
  labs(title = "Proportion of Covid Variants vs New Deaths Per Million") +
  annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
  annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
  annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")


cases_vaccinations_plot <- ggplot(data = us, aes(x = date)) +
  geom_line(aes(y = new_cases_smoothed_per_million), show.legend = FALSE) +
  geom_line(aes(y = people_vaccinated_per_hundred*50)) + 
  scale_y_continuous("New Cases Per Million", sec.axis=sec_axis(~./50, name = "People Vaccinated Per Hundred")) + 
  theme_minimal() + 
  labs(title = "New Cases Per Million vs. People Vaccinated Per Hundred")

deaths_vaccinations_plot <- ggplot(us, aes(x = date)) +
  geom_line(aes(y = new_deaths_per_million), show.legend = FALSE) +
  geom_line(aes(y = people_fully_vaccinated_per_hundred/7)) + 
  scale_y_continuous("New Deaths Per Million", sec.axis=sec_axis(~.*7, name = "People Vaccinated Per Hundred")) + 
  theme_minimal() + 
  labs(title = "New Deaths Per Million vs. People Fully Vaccinated Per Hundred")


variants_hospitalizations_plot <- ggplot(us, aes(x = date)) + 
  geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") + 
  geom_line(aes(y = weekly_hosp_admissions_per_million / 5)) + 
  scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~.*5, name = "Hospitalizations Per Million")) + 
  theme_minimal() + 
  labs(title = "Proportion of Covid Variants vs Hospitalizations Per Million") +
  annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
  annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
  annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")

variants_vaccinations_plot <- ggplot(us, aes(x = date)) + 
  geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") + 
  geom_line(aes(y = people_fully_vaccinated_per_hundred)) + 
  scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~., name = "People Vaccinated Per Hundred")) + 
  theme_minimal() + 
  labs(title = "Proportion of Covid Variants vs People Fully Vaccinated") +
  annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
  annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
  annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")

variants_vaccinations_plot
```

```{r}
vaccinations_plot
```

```{r}
variants_cases_plot <- ggplot(data = us, aes(x = date)) + 
  geom_area(aes(y = perc_sequences, color = variant, fill = variant), show.legend =FALSE, alpha = 0.5, position = "dodge") + 
  geom_line(aes(y = new_cases_smoothed_per_million / 40)) + 
  scale_y_continuous("Percent of Sequences", sec.axis=sec_axis(~.*40, name = "New Cases Per Million")) + 
  theme_minimal() + 
  labs(title = "Proportion of Covid Variants vs New Cases Per Million") +
  annotate(geom="label", x=ymd(20200701), y=75, label="Alpha/Other") +
  annotate(geom="label", x=ymd(20210901), y=75, label="Delta") +
  annotate(geom="label", x=ymd(20220201), y=75, label="Omicron")

variants_cases_plot
```

```{r}
variants_hospitalizations_plot
```

```{r}
variants_deaths_plot
```


# Time Series Analysis

```{r}
us %>% 
  plot_time_series(date, new_cases_smoothed_per_million)
```

```{r}
us %>% 
  plot_acf_diagnostics(date, new_cases_smoothed_per_million, .show_white_noise_bars = T) 
```


```{r}
gisaid_variants %>% 
  filter(Country == "USA", variant %in% c("VOC Omicron GRA (B.1.1.529+BA.*) ", "VOC Delta GK (B.1.617.2+AY.*) ", "VOC Alpha GRY (B.1.1.7+Q.*) ")) %>% 
  plot_time_series(date, perc_sequences, .facet_vars=variant, .legend_show = FALSE)
```

```{r}
gisaid_variants %>% 
  filter(Country == "USA", variant %in% c("VOC Omicron GRA (B.1.1.529+BA.*) ", "VOC Delta GK (B.1.617.2+AY.*) ", "VOC Alpha GRY (B.1.1.7+Q.*) ")) %>% 
  group_by(variant) %>% 
  plot_acf_diagnostics(date, perc_sequences, .show_white_noise_bars = T) 
```

```{r}
library("sf")
library("rnaturalearth")
library("rnaturalearthdata")

world <- ne_countries(scale = "medium", returnclass = "sf")
class(world)
```

```{r}
unique(gisaid$Country)
```

```{r}
unique(covid$location)
```


```{r}
world_variants <- gisaid %>% 
  group_by(Country) %>% 
  mutate(Country = case_when(
  Country == "USA" ~ "United States",
  TRUE ~ as.character(Country)
  )) %>% 
  summarise(most_recent_date = date[n()], 
            prevalent_variant = variant[date == date[n()] & Type == "Variant" & perc_sequences == max(perc_sequences)]) %>% 
            #prevalent_lineage = variant[date == date[n()] & Type == "Lineage" & perc_sequences == max(perc_sequences)]) %>% 
  rename(location = Country)

world_variants <- left_join(world_variants, covid[, c("location", "iso_code")], by = "location", all.x = TRUE) %>% 
  rename(gu_a3 = iso_code)

world_variants_map <- left_join(world, world_variants, by = "gu_a3", all.x = TRUE)
```

```{r}
p <- ggplot(data = world_variants_map, aes(fill = prevalent_variant)) + 
  geom_sf(show.legend = FALSE) + 
  xlab("Longitude") + 
  ylab("Latitude") + 
  theme(panels.grid.major = element_line(color = gray(.5), linetype = "dashed", size = 0.5), panel.background = element_rect(fill = "aliceblue")) 
```

```{r}
#p
```


